//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying female variable
local female "female_genderize"

// gender of author is known
local known_gender "female_genderize!=."

// field controls aer
local field_controls_aer "i.field_microeconomics i.field_theory i.field_macroeconomics i.field_labor i.field_econometrics i.field_io i.field_international i.field_finance i.field_public i.field_health_urban i.field_development i.field_history i.field_lab i.field_other"

// field controls Nature
local field_controls_nature "i.earth i.physical i.social i.biological i.health"

// field controls PNAS
local field_controls_pnas "i.type_num"


//
// Analysis

local i=1

//
// AER

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)

// analysis

* no field controls
local coefficients`i' "2"
regress `female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins comment
matrix a`i' = r(b)

* field controls
local i=`i'+1
local coefficients`i' "2"
regress `female' i.comment i.year `field_controls_aer' if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins comment
matrix a`i' = r(b)



//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// merge with field information
merge m:1 article_id using "${data}/output/nature_fields_matched_ids.dta"
keep if _merge==3

// analysis

* no field controls
local i=`i'+1
local coefficients`i' "2"
regress `female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins comment
matrix a`i' = r(b)

* field controls
local i=`i'+1
local coefficients`i' "2"
regress `female' i.comment i.year `field_controls_nature' if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins comment
matrix a`i' = r(b)



//
// PNAS 

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020 | year<2008  // PNAS started comments in 2008
keep if comment | research_article

// generate field information for comments based on article-comment links
generate type_num = .
replace type_num = 1 if type=="Biological Sciences"
replace type_num = 2 if type=="Physical Sciences"
replace type_num = 3 if type=="Social Sciences"
local N = _N
forvalues i=1/`N' {
	if call_to[`i'] != . {
		local id = call_to[`i']
		sum type_num if article_id==`id'
		replace type_num = r(mean) if _n==`i'
	}
}

// analysis

* no field controls
local i=5
local coefficients`i' "2"
regress `female' i.comment i.year if `known_gender' & type_num!=., vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins comment
matrix a`i' = r(b)

* field controls
local i=`i'+1
local coefficients`i' "2"
regress `female' i.comment i.year `field_controls_pnas' if `known_gender' & type_num!=., vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins comment
matrix a`i' = r(b)



//
// create significance stars for regressions above
forvalues j=1/6 {
	foreach k of numlist `coefficients`j'' {
		if `p`j'_`k''<0.1 & `p`j'_`k''>=0.05 {
			scalar st`j'_`k'="*"
		}
		else if `p`j'_`k''<0.05 & `p`j'_`k''>=0.01 {
			scalar st`j'_`k'="**"
		}
		else if `p`j'_`k''<0.01 & `p`j'_`k''>=0.001 {
			scalar st`j'_`k'="***"
		}
		else if `p`j'_`k''<0.001 {
			scalar st`j'_`k'="****"
		}
		else {
			scalar st`j'_`k'=""
		}
	}
}




